// Step 1: proximity between labex and NAF

use "${tmp}/matrix_issn_labex", clear
	joinby issn using "${tmp}/matrix_ape_issn"


	merge m:1 issn using "${data}/Utils/issn_to_keep", keep(1 3) nogen
	
	forvalues lim = 1(1)5 {
		bys labex ape : egen foo = sum(share_labex_issn*(herfi > `lim'/10 &  herfi != .) )
		gen share_labex_issn`lim' = share_labex_issn / foo
		drop foo
		bys ape labex: egen proxHerf`lim' = sum(share_labex_issn`lim' * share_ape_issn)
		drop share_labex_issn`lim'
	}
	bys ape labex: egen share = sum(share_labex_issn * share_ape_issn)
	keep labex share ape nb_papers_tot proxHerf*
	gduplicates drop

	
	rename share prox
	recode prox* (missing = 0)
	
	preserve
		gcollapse (sum) s_prox = prox (mean) m_prox = prox, by(labexid )
		rename s_prox s_proxlabex
		rename m_prox m_proxlabex
		rename labexid mainlabex
		save "${tmp}/proxlabex", replace
	restore
	
	
save "${tmp}/Proximity_naflabex", replace


// Step 2: predicted exposure
use  "${data}/Source/main_labex", clear
	
	replace note_totale = 30 if inlist(labexid, "i41", "i2") // Missing info on note_totale but accepted labex
	replace labexid = correspondance if substr(corresp,1,1) == "i" // replace first submissions for resubmissions in 2nd wave (starting with i)
	bys labexid: gen N = _N
	drop if correspondance != "" & labexid != correspondance & N != 1

	qui su note_totale if year == 2010
	local m1 = r(mean)
	qui su note_totale if year == 2011
	local m2 = r(mean)
	
	foreach var of varlist note_? note_totale {
		replace `var' = `var'*`m2' / `m1' if year == 2010
		replace `var' = floor(`var'*2)/2
	}

	keep latitude longitude dotation note_totale accept fund_req field_* year labexid dotation categ note_?
	gen ratio = dotation / fund_re
	replace ratio = 1 if ratio > 1 & ratio != .
	replace categ = 5 if categ == .
	tab categ, gen(cat_)
	gen log_d = log(dotation)
	gen log_req = log(fund)
	gen note_round = round(note_total)
	
	gen field_ukn = field_health ==.
	recode field_* (missing =0)
	replace note_round = 0 if note_round < 23
	
	// Estimating dotation
	poisson dotation log_req field_*  i.year 
	predict foo, xb
	gen dotation_pred = exp(foo)
	
	su dotation
	local m = r(mean)
	su dotation_pred if dotation ! =.	
	
	drop log_d log_req field_* foo

	keep labexid note_totale dotation_pred note_round dotation accept note_? year
save "${tmp}/main_labex_with_ctrf", replace


use "${tmp}/Proximity_naflabex", clear
	merge m:1 labexid using "${tmp}/main_labex_with_ctrf", keep(3) nogen
	gen yearfin = 2010 if substr(labexid,1,1) == "i"
	replace yearfin = 2011 if substr(labexid,1,1) == "p"
	
	gen exposure = prox*dotation
	gen exposure_pred = prox*dotation_pred
	merge m:1 labexid using "${data}/Utils/labexcom", nogen keep(1 3)
	// Paris, Lyon and Marseille have special codes for districts: aggregate 
	replace depcom = 75056 if floor(depcom/1000) == 75
	replace depcom = 13055 if inrange(depcom,13201,13216)
	replace depcom = 69123 if inrange(depcom,69381,69389)

	merge m:1 depcom using "${data}/Utils/depcom_ZE_mapping", nogen keep(1 3)
	
	bys  ZE2010 ape: egen foo = max(exposure_pred)
	gen mainlabex = labexid if foo == exposure_pred

	bys ZE2010 ape (mainlabex): replace mainlabex = mainlabex[_N] 
	
	bys ZE2010 ape: egen den = sum(exposure_pred)
	gen share_exposure_maxlabex = exposure_pred / den if labex == mainlabex
	drop den foo
	
	// Herfindal
	forvalues i = 1/5 {
		gen exposure_Herf`i' = proxHerf`i'*dotation
	}
	
	gen exposure_grade = prox*note_total 
	gen exposure_pred_ref = prox*dotation_pred*(1-accept)

		
	preserve
	
		bys ZE2010: egen ZE_labex_accepted = max(dotation != .)
		bys ape: egen NAF_labex_accepted = max(dotation != .)
		
		keep NAF_labex_accepted ZE_labex_accepted ZE2010 ape
		gduplicates drop
		
		egen id = group(ZE2010)
		egen t = group(ape)
		drop if ZE2010 == ""
		drop if ape == ""
		xtset id t
		tsfill, full
		
		bys id (ZE2010): replace ZE2010 = ZE2010[_N] if ZE2010 == ""
		bys t (ape): replace ape = ape[_N] if ape == ""
		drop id t
		merge 1:m  ape ZE2010 using "${tmp}/DADSpri_RD_ZE_NAF_0419", keep(3) nogen keepusing(ape ZE2010)
		duplicates drop
		
		bys ZE2010: egen ZE_labex_accepted2 = max((ZE_labex_accepted == 1))
		drop ZE_labex_accepted
		ren ZE_labex_accepted2 ZE_labex_accepted

		bys ape: egen NAF_labex_accepted2 = max((NAF_labex_accepted == 1))
		drop NAF_labex_accepted
		ren NAF_labex_accepted2 NAF_labex_accepted
		
		gen NAF_labex_candidate = 1
		gen ZE_labex_candidate = 1
		save "${tmp}/status_ZENAF", replace
	restore
	
	// Merging other measures constructed at the ze x ape level
	bys ape: egen exposure_noborder = sum(exposure)
	bys ape: egen exposure_noborder_pred = sum(exposure_pred)
	
	bys  ZE2010 ape: egen foo2 = max(exposure_noborder_pred)
	gen mainlabex_noborder = labexid if foo2 == exposure_noborder_pred
	bys ZE2010 ape (mainlabex_noborder): replace mainlabex_noborder = mainlabex_noborder[_N] 
	
	collapse (sum) prox* exposure* dotation*    (max) share_exposure_maxlabex* (first) mainlabex* , by(ZE2010 ape)
	drop if ape == ""
	egen id1 = group(ZE2010)
	egen id2 = group(ape)
	xtset id1 id2
	drop if id2 == .
	drop if id1 == .
	tsfill, full
	gen foo = ZE2010 != "" & ape != ""
	bys id1 (ZE2010): replace ZE2010 = ZE2010[_N] if ZE2010 == ""
	bys id2 (ape): replace ape = ape[_N]
	
	

	keep if foo
	drop id1 id2 foo 
	
	
save  "${tmp}/Proximity_nafZE", replace



use "${tmp}/DADSpri_RD_ZE_NAF_0419", clear
	drop if mi(ZE2010) | mi(ape)

	merge m:1 ZE2010 ape using "${tmp}/Proximity_nafZE", nogen keep(1  3)
	merge m:1 ape ZE using "${tmp}/status_ZENAF", keep(3) nogen
	recode prox exposure* (missing = 0)
save "${tmp}/exposure_main_file", replace